Huffman Coding
The core of Huffman coding is the Huffman tree, which is a binary tree that represents the optimal prefix-free binary codes for data compression.
This technique is widely used in various digital storage formats, including JPEG and MPEG-2, as it effectively reduces the amount of storage required.
Problem
Given a set of characters along with their frequency or count, the goal is to use the Huffman tree, which employs a greedy approach, to generate a unique codeword for each character.
The process of converting the characters into their corresponding codewords is known as encoding.
Input
Given a set of characters along with their frequency or count values.
Output
The codeword for each character.
Example:
- A - 110
- B - 111
- C - 001
Types of Huffman Encoding
1. Variable Length Encoding
In this type of encoding, each character is assigned a codeword of different lengths based on its frequency.
2. Fixed Length Encoding
In this type, each character is assigned a codeword of the same length.
Huffman Tree Construction and Min Heap
The Huffman tree is constructed in a bottom-up manner, where all given characters are stored in the leaves of the tree.
The tree is built using the following two steps:
- Huffman Tree Construction: The tree is constructed using the frequency values of the characters.
- Assigning Codewords: Once the tree is constructed, each character is assigned its corresponding codeword.
To facilitate the construction of the Huffman tree, a Min Heap data structure is used.
The Min Heap acts as a priority queue, where the value of the parent node is always less than or equal to the values of its child nodes.
The value of a parent node in the Huffman tree is the sum of its children's values.
Video Lecture
Huffman Coding - Example
Huffman Coding Algorithm
HuffmanCoding(A[0]..A[n-1])
Input: Array A containing characters and their frequencies.
Output: A prefix code for each character, minimizing the total encoded length.
Step 1: Start.
Step 2: Create a priority queue (min-heap) where each node represents a character and its frequency.
Step 3: While there is more than one node in the priority queue, do the following:
Step 4: Extract the two nodes with the lowest frequencies from the priority queue.
Step 5: Create a new internal node with a frequency equal to the sum of the two nodes' frequencies. The new node becomes the parent of the two nodes.
Step 6: Insert the new node back into the priority queue.
Step 7: The remaining node in the priority queue is the root of the Huffman tree. Traverse the tree to assign codes to characters.
Step 8: Stop.
Huffman Coding code
#include <stdio.h>
#include <stdlib.h>
// Node of the MinHeap
struct MinHNode {
char character;
unsigned freq;
struct MinHNode *left, *right;
};
// MinHeap structure
struct MinHeap {
unsigned size;
unsigned capacity;
struct MinHNode **array;
};
// Function to create a new MinHNode
struct MinHNode* newNode(char character, unsigned freq) {
struct MinHNode* node = (struct MinHNode*)malloc(sizeof(struct MinHNode));
node->left = node->right = NULL;
node->character = character;
node->freq = freq;
return node;
}
// Function to create a MinHeap
struct MinHeap* createMinH(unsigned capacity) {
struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array = (struct MinHNode**)malloc(minHeap->capacity * sizeof(struct MinHNode*));
return minHeap;
}
// Function to swap two MinHNode pointers
void swapMinHNode(struct MinHNode** a, struct MinHNode** b) {
struct MinHNode* t = *a;
*a = *b;
*b = t;
}
// Function to heapify a subtree rooted with node idx
void minHeapify(struct MinHeap* minHeap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
smallest = left;
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
smallest = right;
if (smallest != idx) {
swapMinHNode(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
// Function to check if size of heap is 1
int checkSizeOne(struct MinHeap* minHeap) {
return minHeap->size == 1;
}
// Function to extract the minimum node from the heap
struct MinHNode* extractMin(struct MinHeap* minHeap) {
struct MinHNode* temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
// Function to insert a new node into the MinHeap
void insertMinHeap(struct MinHeap* minHeap, struct MinHNode* minHeapNode) {
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
// Function to build the MinHeap
void buildMinHeap(struct MinHeap* minHeap) {
int n = minHeap->size - 1;
for (int i = (n - 1) / 2; i >= 0; --i)
minHeapify(minHeap, i);
}
// Function to check if a node is a leaf node
int isLeaf(struct MinHNode* root) {
return !root->left && !root->right;
}
// Function to create and build a MinHeap from characters and their frequencies
struct MinHeap* createAndBuildMinHeap(char items[], int freq[], int size) {
struct MinHeap* minHeap = createMinH(size);
for (int i = 0; i < size; ++i)
minHeap->array[i] = newNode(items[i], freq[i]);
minHeap->size = size;
buildMinHeap(minHeap);
return minHeap;
}
// Function to build the Huffman Tree
struct MinHNode* buildHuffmanTree(char characters[], int frequency[], int size) {
struct MinHNode *left, *right, *top;
struct MinHeap* minHeap = createAndBuildMinHeap(characters, frequency, size);
while (!checkSizeOne(minHeap)) {
left = extractMin(minHeap);
right = extractMin(minHeap);
top = newNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
return extractMin(minHeap);
}
// Function to print Huffman codes from the root of the Huffman Tree
void printHuffmanCodes(struct MinHNode* root, int arr[], int top) {
if (root->left) {
arr[top] = 0;
printHuffmanCodes(root->left, arr, top + 1);
}
if (root->right) {
arr[top] = 1;
printHuffmanCodes(root->right, arr, top + 1);
}
if (isLeaf(root)) {
printf("%c: ", root->character);
for (int i = 0; i < top; ++i)
printf("%d", arr[i]);
printf("\n");
}
}
// Driver code
int main() {
char items[] = {'a', 'b', 'c', 'd', 'e', 'f'};
int freq[] = {5, 9, 12, 13, 16, 45};
int size = sizeof(items) / sizeof(items[0]);
struct MinHNode* root = buildHuffmanTree(items, freq, size);
int arr[100];
printHuffmanCodes(root, arr, 0);
return 0;
}
#include <iostream>
#include <queue>
#include <unordered_map>
#include <vector>
using namespace std;
// A Huffman Tree node
struct Node {
char ch;
int freq;
Node *left, *right;
Node(char ch, int freq) {
left = right = nullptr;
this->ch = ch;
this->freq = freq;
}
};
// Comparison object to be used to order the heap
struct compare {
bool operator()(Node* left, Node* right) {
return left->freq > right->freq;
}
};
// Print Huffman codes from the root of Huffman Tree.
void printCodes(struct Node* root, string str) {
if (!root)
return;
if (root->ch != '$')
cout << root->ch << ": " << str << "\n";
printCodes(root->left, str + "0");
printCodes(root->right, str + "1");
}
// The main function that builds a Huffman Tree and print codes
void HuffmanCodes(char data[], int freq[], int size) {
struct Node *left, *right, *top;
// Create a min heap & inserts all characters of data[]
priority_queue<Node*, vector<Node*>, compare> minHeap;
for (int i = 0; i < size; ++i)
minHeap.push(new Node(data[i], freq[i]));
// Iterate while size of heap doesn't become 1
while (minHeap.size() != 1) {
// Extract the two minimum freq items from heap
left = minHeap.top();
minHeap.pop();
right = minHeap.top();
minHeap.pop();
// Create a new internal node with frequency equal to the sum of the two nodes frequencies.
top = new Node('$', left->freq + right->freq);
top->left = left;
top->right = right;
minHeap.push(top);
}
// Print Huffman codes using the Huffman tree built above
printCodes(minHeap.top(), "");
}
int main() {
char arr[] = { 'a', 'b', 'c', 'd', 'e', 'f' };
int freq[] = { 5, 9, 12, 13, 16, 45 };
int size = sizeof(arr) / sizeof(arr[0]);
HuffmanCodes(arr, freq, size);
return 0;
}
import java.util.PriorityQueue;
import java.util.Comparator;
class Node {
char ch;
int freq;
Node left, right;
Node(char ch, int freq) {
this.ch = ch;
this.freq = freq;
left = right = null;
}
}
class HuffmanCoding {
public static void printCodes(Node root, String str) {
if (root == null)
return;
if (root.ch != '$')
System.out.println(root.ch + ": " + str);
printCodes(root.left, str + "0");
printCodes(root.right, str + "1");
}
public static void buildHuffmanTree(char[] data, int[] freq, int size) {
PriorityQueue<Node> minHeap = new PriorityQueue<>(size, Comparator.comparingInt(node -> node.freq));
for (int i = 0; i < size; i++) {
minHeap.add(new Node(data[i], freq[i]));
}
while (minHeap.size() > 1) {
Node left = minHeap.poll();
Node right = minHeap.poll();
Node top = new Node('$', left.freq + right.freq);
top.left = left;
top.right = right;
minHeap.add(top);
}
printCodes(minHeap.peek(), "");
}
public static void main(String[] args) {
char[] arr = { 'a', 'b', 'c', 'd', 'e', 'f' };
int[] freq = { 5, 9, 12, 13, 16, 45 };
int size = arr.length;
buildHuffmanTree(arr, freq, size);
}
}
import heapq
from collections import defaultdict, namedtuple
class Node(namedtuple("Node", ["char", "freq"])):
def __lt__(self, other):
return self.freq < other.freq
def build_huffman_tree(data, freq):
heap = [Node(ch, freq[ch]) for ch in data]
heapq.heapify(heap)
while len(heap) > 1:
left = heapq.heappop(heap)
right = heapq.heappop(heap)
node = Node('$', left.freq + right.freq)
node.left = left
node.right = right
heapq.heappush(heap, node)
return heap[0]
def print_codes(root, code=""):
if root is None:
return
if root.char != '$':
print(f"{root.char}: {code}")
print_codes(getattr(root, 'left', None), code + "0")
print_codes(getattr(root, 'right', None), code + "1")
def huffman_coding(data, freq):
root = build_huffman_tree(data, freq)
print_codes(root)
# Example usage
data = ['a', 'b', 'c', 'd', 'e', 'f']
freq = {'a': 5, 'b': 9, 'c': 12, 'd': 13, 'e': 16, 'f': 45}
huffman_coding(data, freq)
Time Complexity of Huffman Code
Time Complexity Basic Operation: Insertion and Selection
Analysis
The time complexity of the Huffman algorithm is O(n \log n). Using a heap to store the weight of each tree, each iteration requires O(\log n) time to determine the smallest weight and insert the new weight. There are O(n) iterations, one for each item. Therefore, the overall time complexity of the algorithm is:
T(n) = O(n) * O(log n) = O(n log n)